# Functions
# Plot color palette
plot_color_palette <- function(input_cols) {
col_data <- tibble(color = input_cols) %>%
mutate(color = fct_inorder(color))
res <- col_data %>%
ggplot(aes(x = "color", fill = color)) +
geom_bar() +
scale_fill_manual(values = input_cols) +
theme_void()
res
}
# Create color palette
create_gradient <- function(cols_in, n = NULL) {
if (is.null(n)) {
n <- length(cols_in)
}
colorRampPalette(cols_in)(n)
}
create_col_fun <- function(cols_in) {
function(n = NULL) {
create_gradient(cols_in, n)
}
}
# Pull items from list of vectors
pull_nest_vec <- function(list_in, idx) {
res <- map_chr(list_in, ~ .x[[idx]])
res
}
# Capitalize first character without modifying other characters
str_to_title_v2 <- function(string) {
cap_first_chr <- function(string) {
chrs <- string %>%
str_split(pattern = "") %>%
unlist()
if (any(chrs %in% LETTERS)) {
return(string)
}
chrs[1] <- str_to_upper(chrs[1])
res <- chrs %>%
reduce(str_c)
res
}
res <- string %>%
map_chr(~ {
.x %>%
str_split(pattern = " ") %>%
unlist() %>%
map_chr(cap_first_chr) %>%
reduce(str_c, sep = " ")
})
res
}
# Set colors
set_cols <- function(types_in, cols_in, other_cols) {
types_in <- types_in[!types_in %in% names(other_cols)]
cols_in <- cols_in[!cols_in %in% other_cols]
names(cols_in) <- types_in
cols_in <- cols_in[!is.na(names(cols_in))]
res <- c(cols_in, other_cols)
res
}
# Set subtype colors
set_type_cols <- function(type_in, sobjs_in, type_key, type_column = "subtype",
cols_in, other_cols) {
sobjs_in <- sobjs_in[type_key[names(sobjs_in)] == type_in]
res <- sobjs_in %>%
map(~ {
.x@meta.data %>%
pull(type_column) %>%
unique()
}) %>%
reduce(c) %>%
unique() %>%
set_cols(
cols_in = cols_in,
other_cols = other_cols
)
res
}
# Subset Seurat objects for plotting
subset_sobj <- function(sobj_in, name, type_vec, include_list, type_column = "cell_type1", subtype_column = "cell_type2",
clust_column = "cell_type3", data_column = "ova_fc", min_cells = 15, cap_names = T,
orig_idents = c("_1" = "CD45_neg", "_2" = "CD45_pos"),
type_filt = c("DC" = "CD45_pos", "LEC" = "CD45_neg", "fibroblast" = "CD45_neg"), ...) {
res <- sobj_in
# Pull info for cell_type from reference lists
type_in <- type_vec[[name]]
include_types <- include_list[[name]]
# Set cell type, subtype, and cluster columns in meta.data
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
cell_type = !!sym(type_column),
subtype = !!sym(subtype_column),
subtype_cluster = !!sym(clust_column)
) %>%
column_to_rownames("cell_id")
# Set orig.ident based on cell_id suffix
if (!is.null(orig_idents)) {
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
orig.ident = str_extract(cell_id, "_[0-9]+$"),
orig.ident = orig_idents[orig.ident]
) %>%
column_to_rownames("cell_id")
}
# Filter cells based on orig.ident
if (!is.null(type_filt)) {
res <- res %>%
subset(subset = orig.ident == type_filt[type_in])
}
# Filter cells based on input cell type
res <- res %>%
subset(subset = cell_type %in% c(type_in, include_types))
if (!type_in %in% pull(res@meta.data, cell_type)) {
stop(str_c("ERROR: Cell type not present after filtering for ", type_filt[type_in]))
}
# Filter cells based on number of cells per subtype
if (!is.null(min_cells)) {
keep_cells <- res@meta.data %>%
rownames_to_column("cell_id") %>%
group_by(subtype) %>%
filter(n_distinct(cell_id) > min_cells) %>%
pull(cell_id)
res <- res %>%
subset(cells = keep_cells)
}
# Score cell cycle genes
s.genes <- cc.genes$s.genes %>%
str_to_title()
g2m.genes <- cc.genes$g2m.genes %>%
str_to_title()
res <- res %>%
CellCycleScoring(
s.features = s.genes,
g2m.features = g2m.genes
)
# Re-process object
res <- res %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData(vars.to.regress = c("S.Score", "G2M.Score")) %>%
RunPCA() %>%
RunUMAP(dims = 1:40) %>%
AddMetaData(FetchData(., c("PC_1", "PC_2"))) %>%
AddMetaData(FetchData(., c("UMAP_1", "UMAP_2")))
# Add pseudo count
if (0 %in% pull(res@meta.data, !!sym(data_column))) {
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
pseudo = ifelse(!!sym(data_column) > 0, !!sym(data_column), NA),
pseudo = min(pseudo, na.rm = T) * 0.5,
!!sym(data_column) := !!sym(data_column) + pseudo
) %>%
column_to_rownames("cell_id")
}
# Capitalize subtypes
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
subtype = str_to_title_v2(subtype),
subtype_cluster = str_to_title_v2(subtype_cluster)
) %>%
column_to_rownames("cell_id")
res
}
# Re-cluster and run clustify
run_clustifyr <- function(sobj_in, name, type_vec, resolution = 1.8, type_column = "cell_type",
subtype_column = "subtype", clust_column = "subtype_cluster") {
res <- sobj_in
type_in <- type_vec[[name]]
ref <- params$sobj_refs[[type_in]] %>%
here()
if (is_empty(ref)) {
return(res)
}
load(ref)
type_column <- sym(type_column)
subtype_column <- sym(subtype_column)
clust_column <- sym(clust_column)
ref <- ref %>%
basename() %>%
str_remove("\\.rda$")
res <- sobj_in %>%
FindNeighbors(
reduction = "pca",
dims = 1:40
) %>%
FindClusters(
resolution = resolution,
verbose = F
) %>%
clustify(
ref_mat = eval(parse(text = ref)),
cluster_col = "seurat_clusters"
)
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
!!subtype_column := if_else(!!type_column == type_in, type, !!type_column),
!!subtype_column := str_to_title_v2(!!subtype_column),
!!clust_column := if_else(!!type_column == type_in, str_c(seurat_clusters, "-", !!subtype_column), !!subtype_column)
) %>%
rename(
UMAP_1 = UMAP_1.x,
UMAP_2 = UMAP_2.x
) %>%
column_to_rownames("cell_id")
res
}
# Filter list of Seurat objects for patient, normalize and merge objects
merge_sobj <- function(sobj_list, sample_order = NULL) {
res <- merge(
x = sobj_list[[1]],
y = sobj_list[2:length(sobj_list)],
add.cell.ids = names(sobj_list)
) %>%
ScaleData(assay = "RNA") %>%
ScaleData(assay = "adt") %>%
FindVariableFeatures(assay = "RNA")
# Set sample order
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
column_to_rownames("cell_ids")
res
}
# Fit gaussian mixture model for given signal
fit_GMM <- function(sobj_in, data_column = "adt_ovalbumin", data_slot = "counts", prob = 0.5, quiet = F) {
set.seed(42)
# Fit GMM for OVA signal
data_df <- sobj_in %>%
FetchData(data_column, slot = data_slot) %>%
rownames_to_column("cell_id") %>%
mutate(!!sym(data_column) := !!sym(data_column) + 1) %>%
column_to_rownames("cell_id")
quiet_EM <- quietly(~ normalmixEM(.))
if (!quiet) {
quiet_EM <- normalmixEM
}
mixmdl <- data_df %>%
pull(data_column) %>%
quiet_EM()
if (quiet) {
mixmdl <- mixmdl$result
}
# New column names
ova_names <- c("Low", "High")
comp_names <- c("comp.1", "comp.2")
if (mixmdl$mu[1] > mixmdl$mu[2]) {
ova_names <- rev(ova_names)
}
names(comp_names) <- ova_names
names(mixmdl$mu) <- ova_names
names(mixmdl$sigma) <- ova_names
names(mixmdl$lambda) <- ova_names
# Divide into OVA groups
res <- data.frame(
cell_id = rownames(data_df),
data = data_df[, data_column],
mixmdl$posterior
) %>%
dplyr::rename(!!sym(data_column) := data) %>%
rename(all_of(comp_names)) %>%
mutate(GMM_grp = if_else(High >= prob, "High", "Low")) %>%
column_to_rownames("cell_id")
res <- list(
res = res,
mu = mixmdl$mu,
sigma = mixmdl$sigma,
lambda = mixmdl$lambda
)
res
}
# Classify cells based on OVA signal
classify_ova <- function(sobj_in, filt_column = "cell_type", filt = NULL, data_column = "adt_ovalbumin", data_slot = "counts", ...) {
# Filter Seurat object
sobj_filt <- sobj_in
if (!is.null(filt)) {
sobj_filt <- sobj_filt %>%
subset(!!sym(filt_column) == filt)
}
# Fit GMM
gmm_res <- sobj_filt %>%
fit_GMM(
data_column = data_column,
data_slot = data_slot,
...
)
gmm_df <- gmm_res$res %>%
rownames_to_column("cell_id") %>%
mutate(
mu = gmm_res$mu[GMM_grp],
GMM_grp = str_to_lower(GMM_grp),
GMM_grp = str_c("ova ", GMM_grp),
) %>%
column_to_rownames("cell_id")
# Add OVA groups to meta.data
res <- sobj_in %>%
AddMetaData(gmm_df)
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(GMM_grp = if_else(is.na(GMM_grp), "Other", GMM_grp)) %>%
column_to_rownames("cell_id")
res
}
# Add distribution of GMM component to plot
add_stat_fun <- function(gmm_in, cols_in, key) {
# dnorm provides density for the normal distribution given the mean and
# standard deviation. Lambda is used to adjust for the mixture composition.
# mu: Mean of component
# sig: Standard deviation of component
# lam: Lambda of component (mixture weight)
plot_mix_comps <- function(x, mu, sigma, lam) {
lam * dnorm(x, mu, sigma)
}
stat_function(
geom = "line",
fun = plot_mix_comps,
args = list(gmm_in$mu[key], gmm_in$sigma[key], gmm_in$lambda[key]),
color = cols_in[key],
lwd = 1
)
}
# Overlay feature data on UMAP or tSNE
# Cannot change number of columns when using FeaturePlot with split.by
plot_features <- function(sobj_in, x = "UMAP_1", y = "UMAP_2", feature, data_slot = "data",
split_id = NULL, pt_size = 0.25, pt_outline = NULL, plot_cols = NULL,
feat_levels = NULL, split_levels = NULL, min_pct = NULL, max_pct = NULL,
calc_cor = F, lm_line = F, lab_size = 3.7, lab_pos = c(0.8, 0.9), ...) {
# Format imput data
counts <- sobj_in
if ("Seurat" %in% class(sobj_in)) {
vars <- c(x, y, feature)
if (!is.null(split_id)) {
vars <- c(vars, split_id)
}
counts <- sobj_in %>%
FetchData(vars = unique(vars), slot = data_slot) %>%
as_tibble(rownames = "cell_ids")
}
# Rename features
if (!is.null(names(feature))) {
counts <- counts %>%
rename(!!!syms(feature))
feature <- names(feature)
}
if (!is.null(names(x))) {
counts <- counts %>%
rename(!!!syms(x))
x <- names(x)
}
if (!is.null(names(y))) {
counts <- counts %>%
rename(!!!syms(y))
y <- names(y)
}
# Set min and max values for feature
if (!is.null(min_pct) || !is.null(max_pct)) {
counts <- counts %>%
mutate(
pct_rank = percent_rank(!!sym(feature)),
max_val = ifelse(pct_rank > max_pct, !!sym(feature), NA),
max_val = min(max_val, na.rm = T),
min_val = ifelse(pct_rank < min_pct, !!sym(feature), NA),
min_val = max(min_val, na.rm = T),
!!sym(feature) := if_else(!!sym(feature) > max_val, max_val, !!sym(feature)),
!!sym(feature) := if_else(!!sym(feature) < min_val, min_val, !!sym(feature))
)
}
# Set feature order
if (!is.null(feat_levels)) {
counts <- counts %>%
mutate(!!sym(feature) := fct_relevel(!!sym(feature), feat_levels))
}
# Set facet order
if (!is.null(split_id) && length(split_id) == 1) {
counts <- counts %>%
mutate(split_id = !!sym(split_id))
if (!is.null(split_levels)) {
counts <- counts %>%
mutate(split_id = fct_relevel(split_id, split_levels))
}
}
# Calculate correlation
if (calc_cor) {
if (!is.null(split_id)) {
counts <- counts %>%
group_by(split_id)
}
counts <- counts %>%
mutate(
cor_lab = cor(!!sym(x), !!sym(y)),
cor_lab = round(cor_lab, digits = 2),
cor_lab = str_c("r = ", cor_lab),
min_x = min(!!sym(x)),
max_x = max(!!sym(x)),
min_y = min(!!sym(y)),
max_y = max(!!sym(y)),
lab_x = (max_x - min_x) * lab_pos[1] + min_x,
lab_y = (max_y - min_y) * lab_pos[1] + min_y
)
}
# Create scatter plot
# To add outline for each cluster create separate layers
res <- counts %>%
arrange(!!sym(feature))
if (!is.null(pt_outline)) {
if (!is.numeric(counts[[feature]])) {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature), fill = !!sym(feature)))
feats <- counts[[feature]] %>%
unique()
if (!is.null(feat_levels)) {
feats <- feat_levels[feat_levels %in% feats]
}
for (feat in feats) {
f_counts <- counts %>%
filter(!!sym(feature) == feat)
res <- res +
geom_point(data = f_counts, aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
geom_point(data = f_counts, size = pt_size)
}
} else {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
geom_point(aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
geom_point(size = pt_size)
}
} else {
res <- res %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
geom_point(size = pt_size)
}
# Add regression line
if (lm_line) {
res <- res +
geom_smooth(method = "lm", se = F, color = "black", size = 0.5, linetype = 2)
}
# Add correlation coefficient label
if (calc_cor) {
res <- res +
geom_text(
aes(x = lab_x, lab_y, label = cor_lab),
color = "black",
size = lab_size,
check_overlap = T,
show.legend = F
)
}
# Set feature colors
if (!is.null(plot_cols)) {
if (is.numeric(counts[[feature]])) {
res <- res +
scale_color_gradientn(colors = plot_cols)
# scale_color_gradient(low = plot_cols[1], high = plot_cols[2])
} else {
res <- res +
scale_color_manual(values = plot_cols) +
scale_fill_manual(values = plot_cols)
}
}
# Split plot into facets
if (!is.null(split_id)) {
if (length(split_id) == 1) {
res <- res +
facet_wrap(~ split_id, ...)
} else if (length(split_id) == 2) {
eq <- str_c(split_id[1], " ~ ", split_id[2])
res <- res +
facet_grid(as.formula(eq), ...)
}
}
res
}
# Run gprofiler
run_gprofiler <- function(gene_list, genome = NULL, gmt_id = NULL, p_max = params$p_max_GO, GO_size = params$term_size,
intrsct_size = params$intrsct_size, order_query = params$order_query, dbases = c("GO:BP", "GO:MF", "KEGG"), ...) {
# Check for empty gene list
if (is_empty(gene_list)) {
return(as_tibble(NULL))
}
# Check arguments
if (is.null(genome) && is.null(gmt_id)) {
stop("ERROR: Must specifiy genome or gmt_id")
}
# Use gmt id
if (!is.null(gmt_id)) {
genome <- gmt_id
dbases <- NULL
}
# Run gProfileR
res <- gene_list %>%
gost(
organism = genome,
sources = dbases,
domain_scope = "annotated",
significant = T,
user_threshold = p_max,
ordered_query = order_query,
...
)
# Format and sort output data.frame
res <- as_tibble(res$result)
if (!is_empty(res)) {
res <- res %>%
filter(
term_size > GO_size,
intersection_size > intrsct_size
) %>%
arrange(source, p_value)
}
res
}
# Create GO bubble plot
create_bubbles <- function(GO_df, plot_colors = theme_cols[c(1:2, 4, 9)],
n_terms = 15) {
# Check for empty inputs
if (is_empty(GO_df) || nrow(GO_df) == 0) {
res <- ggplot() +
geom_blank()
return(res)
}
# Shorten GO terms and database names
GO_data <- GO_df %>%
mutate(
term_id = str_remove(term_id, "(GO|KEGG):"),
term_id = str_c(term_id, " ", term_name),
term_id = str_to_lower(term_id),
term_id = str_trunc(term_id, 40, "right"),
source = fct_recode(
source,
"Biological\nProcess" = "GO:BP",
"Cellular\nComponent" = "GO:CC",
"Molecular\nFunction" = "GO:MF",
"KEGG" = "KEGG"
)
)
# Reorder database names
plot_levels <- c(
"Biological\nProcess",
"Cellular\nComponent",
"Molecular\nFunction",
"KEGG"
)
GO_data <- GO_data %>%
mutate(source = fct_relevel(source, plot_levels))
# Extract top terms for each database
top_GO <- GO_data %>%
group_by(source) %>%
arrange(p_value) %>%
dplyr::slice(1:n_terms) %>%
ungroup()
# Create bubble plots
res <- GO_data %>%
ggplot(aes(1.25, -log10(p_value), size = intersection_size)) +
geom_point(color = plot_colors, alpha = 0.5, show.legend = T) +
geom_text_repel(
aes(2, -log10(p_value), label = term_id),
data = top_GO,
size = 2.3,
direction = "y",
hjust = 0,
segment.size = NA
) +
xlim(1, 8) +
labs(y = "-log10(p-value)") +
theme_info +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
facet_wrap(~ source, scales = "free", nrow = 1)
res
}
# Plot percentage of cells in given groups
plot_cell_count <- function(sobj_in, group_id, split_id = NULL, group_order = NULL, fill_id,
plot_colors = theme_cols, x_lab = "Cell type", y_lab = "Fraction of cells",
bar_pos = "fill", order_count = T, bar_line = 0, ...) {
res <- sobj_in@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(
group_id := !!sym(group_id),
fill_id := !!sym(fill_id)
)
if (!is.null(group_order)) {
res <- res %>%
mutate(group_id = fct_relevel(group_id, group_order))
}
if (!is.null(split_id)) {
res <- res %>%
mutate(split_id := !!sym(split_id))
}
if (order_count) {
res <- res %>%
mutate(fill_id = fct_reorder(fill_id, cell_ids, n_distinct))
}
res <- res %>%
ggplot(aes(group_id, fill = fill_id)) +
geom_bar(position = bar_pos, size = bar_line, color = "black") +
scale_fill_manual(values = plot_colors) +
labs(x = x_lab, y = y_lab) +
theme_info
if (!is.null(split_id)) {
res <- res +
facet_wrap(~ split_id, ...)
}
res
}
# Find markers with presto
find_markers <- function(input_sobj, group_column = NULL, uniq = params$uniq_markers,
FC_min = params$FC_min, auc_min = params$auc_min, p_max = params$p_max_markers,
pct_in_min = params$pct_in_min, pct_out_max = params$pct_out_max, ...) {
res <- input_sobj %>%
wilcoxauc(group_by = group_column) %>%
as_tibble() %>%
filter(
padj < p_max,
logFC > FC_min,
auc > auc_min,
pct_in > pct_in_min,
pct_out < pct_out_max
) %>%
arrange(desc(logFC))
if (uniq) {
res <- res %>%
group_by(feature) %>%
filter(n() == 1) %>%
ungroup()
}
res
}
# Find cluster markers for each separate cell type
find_group_markers <- function(input_grp, input_sobj, grp_column, clust_column) {
res <- input_sobj %>%
subset(!!sym(grp_column) == input_grp)
clusts <- res@meta.data %>%
pull(clust_column)
if (n_distinct(clusts) < 2) {
return(NULL)
}
res <- res %>%
find_markers(group_column = clust_column) %>%
mutate(cell_type = input_grp)
res
}
# Create reference UMAP for comparisons
create_ref_umap <- function(input_sobj, pt_mtplyr = 1, pt_outline = NULL, color_guide, ...) {
if (is.null(pt_outline)) {
pt_outline <- 0.1 * pt_mtplyr + 0.3
}
res <- input_sobj %>%
plot_features(
pt_size = 0.1 * pt_mtplyr,
pt_outline = pt_outline,
...
) +
guides(color = color_guide) +
blank_theme +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10)
)
res
}
# Create UMAPs showing marker gene signal
create_marker_umaps <- function(input_sobj, input_markers, umap_col = NULL, add_outline = NULL,
pt_mtplyr = 1, low_col = "#fafafa") {
# pt_size <- 0.25 * pt_mtplyr
pt_size <- 0.1 * pt_mtplyr
if (!is.null(umap_col)) {
input_markers <- set_names(
x = rep(umap_col, length(input_markers)),
nm = input_markers
)
}
res <- input_markers %>%
imap(~ {
input_sobj %>%
plot_features(
feature = .y,
plot_cols = c(low_col, .x),
pt_outline = add_outline,
pt_size = pt_size
) +
ggtitle(.y) +
blank_theme +
theme(
plot.title = element_text(size = 13),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8),
legend.key.height = unit(0.1, "cm"),
legend.key.width = unit(0.3, "cm"),
axis.title.y = element_text(size = 13, color = "white"),
axis.text.y = element_text(size = 8, color = "white")
)
})
res
}
# Create boxplots showing marker gene signal
create_marker_boxes <- function(input_sobj, input_markers, clust_column, box_cols,
group = NULL, include_legend = F, all_boxes = F,
all_violins = F, order_boxes = T, n_boxes = 10,
n_rows = 2, pt_mtplyr = 1, exclude_clust = NULL, ...) {
# Retrieve and format data for boxplots
input_markers <- input_markers %>%
head(n_boxes)
box_data <- input_sobj %>%
FetchData(c(clust_column, input_markers)) %>%
as_tibble(rownames = "cell_id") %>%
filter(!(!!sym(clust_column) %in% exclude_clust)) %>%
mutate(grp = str_remove(!!sym(clust_column), "^[a-zA-Z0-9_]+-")) # This is specific for Shannon's subtype clusters
input_markers <- input_markers %>%
str_trunc(9)
# Filter based on input group
if (!is.null(group)) {
box_data <- box_data %>%
filter(grp == group)
}
# Format data for plots
box_data <- box_data %>%
pivot_longer(cols = c(-cell_id, -grp, -!!sym(clust_column)), names_to = "key", values_to = "Counts") %>%
mutate(
!!sym(clust_column) := fct_relevel(!!sym(clust_column), names(box_cols)),
key = str_trunc(key, width = 9, side = "right"),
key = fct_relevel(key, input_markers)
)
# Order boxes by mean signal
if (order_boxes) {
box_data <- box_data %>%
mutate(!!sym(clust_column) := fct_reorder(!!sym(clust_column), Counts, mean, .desc = T))
}
n_clust <- box_data %>%
pull(clust_column) %>%
n_distinct()
# Create plots
n_cols <- ceiling(n_boxes / n_rows)
res <- box_data %>%
ggplot(aes(!!sym(clust_column), Counts, color = !!sym(clust_column))) +
facet_wrap(~ key, ncol = n_cols) +
scale_color_manual(values = box_cols) +
theme_info +
theme(
panel.spacing.x = unit(0.7, "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 13),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank()
)
# Adjust output plot type
if (n_clust > 6 || all_boxes) {
res <- res +
geom_boxplot(aes(fill = !!sym(clust_column), color = !!sym(clust_column)), size = 0, outlier.color = "#f0f0f0", outlier.size = 0.25) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
theme(...)
} else if (all_violins) {
res <- res +
geom_violin(aes(fill = !!sym(clust_column)), size = 0.2) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
scale_color_manual(values = box_cols) +
theme(...)
} else {
pt_size <- 0.3 * pt_mtplyr
res <- res +
geom_quasirandom(size = pt_size) +
theme(...)
}
# Add legend
if (include_legend) {
res <- res +
guides(color = col_guide) +
theme(legend.position = "top")
}
# Add blank space for missing facets
n_keys <- n_distinct(box_data$key)
if (n_keys <= n_cols && n_rows > 1) {
n_keys <- if_else(n_keys == 1, 2, as.double(n_keys))
n_cols <- floor(n_cols / n_keys)
res <- res %>%
plot_grid(
ncol = n_cols,
nrow = 2
)
}
res
}
# Create figure summarizing marker genes
create_marker_fig <- function(input_sobj, input_markers, input_GO, clust_column,
input_umap, umap_color, fig_heights = c(0.46, 0.3, 0.3),
GO_genome = params$genome, box_colors, n_boxes = 10,
umap_outline = NULL, umap_mtplyr = 1, xlsx_name = NULL,
sheet_name = NULL, ...) {
marks_umap <- marks_boxes <- GO_bubbles <- ggplot() +
geom_blank() +
theme_void()
# Create UMAPs showing marker gene signal
if (nrow(input_markers) > 0) {
top_marks <- input_markers$feature %>%
head(n_boxes)
clust_legend <- get_legend(input_umap)
input_umap <- input_umap +
theme(legend.position = "none")
marks_umap <- input_sobj %>%
create_marker_umaps(
input_markers = head(top_marks, 7),
umap_col = umap_color,
add_outline = umap_outline,
pt_mtplyr = umap_mtplyr
) %>%
append(list(input_umap), .)
marks_umap <- plot_grid(
plotlist = marks_umap,
ncol = 4,
nrow = 2,
align = "vh",
axis = "trbl"
)
marks_umap <- plot_grid(
clust_legend, marks_umap,
rel_heights = c(0.2, 0.9),
nrow = 2
)
# Create boxplots showing marker gene signal
marks_boxes <- input_sobj %>%
create_marker_boxes(
input_markers = top_marks,
clust_column = clust_column,
box_cols = box_colors,
n_boxes = n_boxes,
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
...
)
# Create GO term plots
if (nrow(input_GO) > 0) {
GO_bubbles <- input_GO %>%
create_bubbles(plot_colors = umap_color) +
theme(
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 13),
axis.text.y = element_text(size = 8),
axis.line.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8)
)
# Write GO terms to excel file
if (!is.null(xlsx_name)) {
input_GO %>%
dplyr::select(
term_name, term_id,
source, effective_domain_size,
query_size, intersection_size,
p_value, significant
) %>%
arrange(source, p_value) %>%
write.xlsx(
file = str_c(xlsx_name, "_GO.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Write markers to excel file
if (!is.null(xlsx_name)) {
input_markers %>%
write.xlsx(
file = str_c(xlsx_name, "_markers.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Create final figure
res <- plot_grid(
marks_umap, marks_boxes, GO_bubbles,
rel_heights = fig_heights,
ncol = 1
)
res
}
# Filter clusters and set cluster order
set_cluster_order <- function(input_cols, input_marks, n_cutoff = 5) {
input_marks <- input_marks %>%
group_by(group) %>%
filter(n() >= n_cutoff) %>%
ungroup()
marks <- unique(input_marks$group)
res <- names(input_cols)
res <- res[res %in% marks]
res
}
# Create v1 panel for marker genes
create_marker_panel_v1 <- function(input_sobj, input_cols, input_umap = NULL, clust_column, order_boxes = T,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, exclude_clust = NULL,
...) {
# Set point size
# ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- umap_mtplyr
# Find marker genes
markers <- input_sobj %>%
find_markers(group_column = clust_column)
# Find GO terms
GO_df <- markers
if (nrow(markers) > 0) {
GO_df <- markers %>%
group_by(group) %>%
do({
arrange(., desc(logFC)) %>%
pull(feature) %>%
run_gprofiler(genome = params$genome)
}) %>%
ungroup()
}
if (uniq && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Set cluster order based on order of input_cols
fig_clusters <- input_cols %>%
set_cluster_order(markers)
fig_clusters <- fig_clusters[!fig_clusters %in% exclude_clust]
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- markers %>%
filter(group == clust)
fig_GO <- GO_df %>%
filter(group == clust)
# Create reference umap
ref_umap <- input_umap
umap_col <- input_cols[clust]
if (is.null(input_umap)) {
umap_levels <- input_cols[names(input_cols) != clust]
umap_levels <- names(c(umap_levels, umap_col))
ref_umap <- input_sobj %>%
create_ref_umap(
feature = clust_column,
plot_cols = input_cols,
feat_levels = umap_levels,
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
}
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = input_cols,
order_boxes = order_boxes,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
exclude_clust = exclude_clust,
...
)
cat(nrow(fig_marks), "marker genes and", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Create v2 panel that splits plots into groups
create_marker_panel_v2 <- function(input_sobj, input_markers, input_cols, grp_column, clust_column,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq_GO = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, clust_regex = "^[a-zA-Z0-9_]+-",
...) {
# Set point size
# ref_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr * 2.5, 1)
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- umap_mtplyr
# Figure colors and order
fig_clusters <- input_cols %>%
set_cluster_order(input_markers)
# Find GO terms
GO_df <- input_markers %>%
group_by(group) %>%
do({
arrange(., desc(logFC)) %>%
pull(feature) %>%
run_gprofiler(genome = params$genome)
}) %>%
ungroup()
if (uniq_GO && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- input_markers %>%
filter(group == clust)
fig_GO <- GO_df %>%
filter(group == clust)
# Set colors
umap_col <- input_cols[clust]
group <- clust %>%
str_remove(clust_regex)
grp_regex <- str_c("-", group, "$") %>%
str_replace("\\+", "\\\\+") # include this to escape "+" in names
fig_cols <- input_cols[grepl(grp_regex, names(input_cols))]
fig_cols <- c( "Other" = "#fafafa", fig_cols)
ref_cols <- fig_cols[names(fig_cols) != clust]
ref_cols <- c(ref_cols, umap_col)
# Create reference UMAP
ref_umap <- input_sobj %>%
FetchData(c("UMAP_1", "UMAP_2", grp_column, clust_column)) %>%
as_tibble(rownames = "cell_id") %>%
mutate(!!sym(clust_column) := if_else(
!!sym(grp_column) != group,
"Other",
!!sym(clust_column)
)) %>%
create_ref_umap(
feature = clust_column,
plot_cols = ref_cols,
feat_levels = names(ref_cols),
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = fig_cols,
group = group,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
...
)
cat(nrow(fig_marks), "marker genes were identified.", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Load packages
R_packages <- c(
"tidyverse", "Seurat",
"gprofiler2", "knitr",
"cowplot", "ggbeeswarm",
"ggrepel", "RColorBrewer",
"xlsx", "colorblindr",
"ggforce", "broom",
"mixtools", "clustifyr",
"boot", "scales",
"patchwork", "ComplexHeatmap",
"devtools", "broom",
"presto", "here"
)
purrr::walk(R_packages, library, character.only = T)
# ggplot2 themes
theme_info <- theme_cowplot() +
theme(
plot.title = element_text(face = "plain", size = 20),
strip.background = element_blank(),
strip.text = element_text(face = "plain")
)
umap_theme <- theme_info +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
blank_theme <- umap_theme +
theme(
axis.line = element_blank(),
axis.title = element_blank()
)
# Legend guides
col_guide <- guide_legend(override.aes = list(size = 3.5, shape = 16))
outline_guide <- guide_legend(override.aes = list(
size = 3.5,
shape = 21,
color = "black",
stroke = 0.25
))
# Okabe Ito color palettes
ito_cols <- c(
palette_OkabeIto[1:4], "#d7301f",
palette_OkabeIto[5:6], "#6a51a3",
palette_OkabeIto[7:8]
)
ito_cols <- ito_cols %>%
darken(0.4) %>%
c(ito_cols, ., "#686868", "#000000")
# Set default palette
get_cols <- create_col_fun(ito_cols)
# OVA colors
ova_cols_1 <- c(
"ova high" = "#475C81",
"ova low" = "#B8ECFF",
"Other" = "#ffffff"
)
ova_cols_2 <- c(
"ova low" = "#B8ECFF",
"ova high" = "#475C81",
"Other" = "#ffffff"
)
ova_cols_3 <- c(
"Other" = "#ffffff",
"ova high" = "#475C81",
"ova low" = "#B8ECFF"
)
ova_cols_4 <- c(
"Other" = "#ffffff",
"ova low" = "#B8ECFF",
"ova high" = "#475C81"
)
# Feature colors
feat_cols <- c(
"#D7301F", "#0072B2",
"#009E73", "#4C3250",
"#E69F00", "#875C04"
)
# Cell subtype palettes
common_cols <- c(
"Epithelial" = "#6a51a3",
"B Cell" = "#E69F00",
"T Cell" = "#009E73",
"Unassigned" = "#ffffff"
)
so_cols <- so_types %>%
map(
set_type_cols,
sobjs_in = sobjs,
type_key = so_types,
cols_in = get_cols(),
other_cols = common_cols
)# Add one to variable
plus_one <- function(x, n = 1) {
cmd <- str_c(x, " <<- ", x, " + ", n)
eval(parse(text = cmd))
eval(parse(text = x))
}
# Add arrows to axis
add_arrow_axis <- function(gg_in, fract = 0.1, ...) {
get_line_coords <- function(range_in, fract) {
mn <- range_in[1]
mx <- range_in[2]
dif <- mx - mn
res <- c(mn - (dif * 0.05), mn + (dif * fract))
res
}
x_coords <- ggplot_build(gg_in)$layout$panel_scales_x[[1]]$range$range %>%
get_line_coords(fract = fract)
y_coords <- ggplot_build(gg_in)$layout$panel_scales_y[[1]]$range$range %>%
get_line_coords(fract = fract)
res <- gg_in +
geom_segment(
x = x_coords[1],
xend = x_coords[2],
y = y_coords[1],
yend = y_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
geom_segment(
y = y_coords[1],
yend = y_coords[2],
x = x_coords[1],
xend = x_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
theme(axis.title = element_text(hjust = 0, size = 10))
res
}
# Set equal x-axis scales
equalize_x <- function(gg_list_in, log_tran = T, ...) {
set_lims <- function(gg_in, min_x, max_x, log_tran, ...) {
res <- gg_in +
coord_cartesian(xlim = c(min_x, max_x))
if (log_tran) {
res <- res +
scale_x_log10(labels = trans_format("log10", math_format(10^.x)), ...)
}
res
}
gg_ranges <- gg_list_in %>%
map(~ ggplot_build(.x)$layout$panel_scales_x[[1]]$range$range)
min_val <- gg_ranges %>%
map_dbl(~ .x[1]) %>%
min()
max_val <- gg_ranges %>%
map_dbl(~ .x[2]) %>%
max()
res <- gg_list_in %>%
map(
set_lims,
min_x = min_val,
max_x = max_val,
log_tran = log_tran,
...
)
res
}
# Create figure 3 panels
create_fig3 <- function(sobj_in, cols_in, subtype_column = "subtype", data_slot = "counts", ova_cols = c("#fafafa", "#d7301f"),
box_counts = c("Relative ova signal" = "ova_fc"), umap_counts = c("ova counts" = "adt_ovalbumin"),
plot_title = NULL, arrow_axis = F, pt_size = 0.1, pt_outline = 0.4, pt_size_2 = 0.3, pt_outline_2 = 0.5,
umap_cell_count = F, box_cell_count = T, control_types = c("B Cell", "T Cell"), ...) {
box_column <- umap_column <- "cell_type"
box_cols <- umap_cols <- cols_in
# Fetch plotting data
data_df <- sobj_in %>%
FetchData(c(subtype_column, box_counts, umap_counts, "UMAP_1", "UMAP_2"), slot = data_slot) %>%
as_tibble(rownames = "cell_id")
if (!is.null(names(box_counts))) {
data_df <- data_df %>%
rename(!!box_counts)
box_counts <- names(box_counts)
}
if (!is.null(names(umap_counts))) {
data_df <- data_df %>%
rename(!!umap_counts)
umap_counts <- names(umap_counts)
}
# Set subtype order
# Move select cell types to front of order
data_df <- data_df %>%
mutate(
cell_type = !!sym(subtype_column),
cell_type = fct_reorder(cell_type, !!sym(box_counts), median)
)
type_order <- levels(data_df$cell_type)
if (!is.null(control_types)) {
control_types <- control_types[control_types %in% type_order]
type_order <- type_order[!type_order %in% control_types]
type_order <- c(control_types, type_order)
}
# Count cells for each subtype
data_df <- data_df %>%
group_by(cell_type) %>%
mutate(cell_count = n_distinct(cell_id)) %>%
ungroup() %>%
mutate(cell_type = fct_relevel(cell_type, type_order)) %>%
arrange(cell_type) %>%
mutate(
cell_count = str_c(cell_type, "\n(n = ", cell_count, ")"),
cell_count = fct_inorder(cell_count)
)
# Set cell type colors
names(type_order) <- levels(data_df$cell_count)
cols_df <- tibble(
cell_type = type_order,
cell_count = names(type_order)
)
cols_df <- cols_df %>%
mutate(color = cols_in[cell_type])
# Subtype UMAP
if (umap_cell_count) {
umap_column <- "cell_count"
umap_cols <- setNames(cols_df$color, cols_df$cell_count)
}
umap <- data_df %>%
plot_features(
feature = umap_column,
pt_size = pt_size,
pt_outline = pt_outline,
plot_cols = umap_cols,
feat_levels = rev(names(umap_cols))
) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
ggtitle(plot_title) +
blank_theme +
theme(
plot.title = element_text(size = 12),
legend.position = "none"
) +
theme(...)
if (arrow_axis) {
umap <- add_arrow_axis(umap)
}
# OVA UMAP
if (!is.null(umap_counts)) {
ova_umap <- data_df %>%
plot_features(
feature = umap_counts,
plot_cols = ova_cols,
pt_size = pt_size_2,
pt_outline = pt_outline_2,
min_pct = 0.01,
max_pct = 0.99
) +
blank_theme +
theme(
plot.title = element_text(size = 10, hjust = 0.5),
legend.position = "right",
legend.key.width = unit(0.15, "cm"),
legend.key.height = unit(0.30, "cm"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10)
)
if (arrow_axis) {
ova_umap <- add_arrow_axis(ova_umap)
}
}
# OVA boxes
if (box_cell_count) {
box_column <- "cell_count"
box_cols <- setNames(cols_df$color, cols_df$cell_count)
}
boxes <- data_df %>%
ggplot(aes(!!sym(box_counts), !!sym(box_column), fill = !!sym(box_column))) +
geom_violin(size = 0.3, draw_quantiles = c(0.25, 0.75), alpha = 0.75) +
stat_summary(geom = "point", color = "black", fun = median) +
scale_color_manual(values = box_cols) +
scale_fill_manual(values = box_cols) +
theme_minimal_vgrid() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
axis.title = element_text(size = 10),
axis.text = element_text(size = 10),
axis.ticks.x = element_line(size = 0.1),
panel.grid.major.x = element_line(size = 0.1)
)
res <- list(umap, boxes)
if (!is.null(umap_counts)) {
res <- append(res, list(ova_umap))
}
names(res) <- rep(plot_title, length(res))
res
}
# Create figure 4 panels
create_fig4 <- function(sobj_in, ova_cols, feats, feat_cols, ref_cols, pt_size = 0.00001, pt_outline = 0.4, gmm_filt = NULL,
low_col = c("white", "white"), sep_bar_labs = F, plot_boxes = T, median_pt = 1) {
# Theme elements
legd_guide <- guide_legend(override.aes = list(
size = 3.5,
shape = 21,
color = "black",
stroke = 0.25
))
text_theme <- theme(
axis.title = element_text(size = 10),
legend.text = element_text(size = 8),
axis.text = element_text(size = 8)
)
# Run GMM
if (!is.null(gmm_filt)) {
sobj_in <- sobj_in %>%
classify_ova(
filt_column = names(gmm_filt),
filt = gmm_filt,
quiet = T
)
}
# Data for OVA group UMAP
ova_order <- names(ova_cols)
data_df <- sobj_in@meta.data %>%
as_tibble(rownames = "cell_id") %>%
group_by(GMM_grp) %>%
mutate(cell_count = n_distinct(cell_id)) %>%
ungroup() %>%
mutate(GMM_grp = fct_relevel(GMM_grp, ova_order)) %>%
arrange(GMM_grp) %>%
mutate(
cell_count = str_c(GMM_grp, "\n(n = ", cell_count, ")"),
cell_count = fct_inorder(cell_count)
)
# Set OVA group colors
names(ova_order) <- levels(data_df$cell_count)
cols_df <- tibble(
grp = ova_order,
cell_count = names(ova_order)
)
cols_df <- cols_df %>%
mutate(color = ova_cols[grp])
umap_cols <- set_names(
x = cols_df$color,
nm = cols_df$cell_count
)
ova_guide <- legd_guide
ova_guide$reverse <- T
# Create OVA group UMAP
ova_grp_umap <- data_df %>%
plot_features(
feature = "cell_count",
pt_size = pt_size,
pt_outline = pt_outline,
plot_cols = umap_cols,
feat_levels = names(ova_order)
) +
guides(color = ova_guide, fill = ova_guide) +
text_theme +
blank_theme +
theme(
plot.margin = unit(c(0.2, 1, 0.2, 1.5), "cm"),
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 8)
)
# OVA hist
ova_hist <- sobj_in@meta.data %>%
filter(GMM_grp != "Other") %>%
mutate(GMM_grp = fct_relevel(GMM_grp, c("ova low", "ova high"))) %>%
ggplot(aes(adt_ovalbumin, after_stat(density), fill = GMM_grp)) +
stat_density(geom = "point", position = "identity", size = 0, color = "white") +
geom_density(fill = "white", color = "white", size = 0.3, alpha = 0.9) +
geom_density(size = 0.3, alpha = 0.8, show.legend = F) +
geom_vline(aes(xintercept = mu), size = 0.5, linetype = 2, color = "grey35") +
coord_cartesian(ylim = c(0, 1.7)) +
scale_fill_manual(values = ova_cols) +
scale_x_log10(labels = trans_format("log10", math_format(10^.x))) +
labs(x = "ova counts", y = "Density") +
guides(fill = guide_legend(override.aes = list(shape = 22, size = 3.5, stroke = 0.25, color = "black"))) +
theme_minimal_hgrid() +
text_theme +
theme(
plot.margin = unit(c(0.2, 0.5, 0.2, 0.2), "cm"),
legend.position = c(0.05, 0.92),
legend.key.height = unit(0.15, "cm"),
legend.title = element_blank(),
axis.line.y = element_line(size = 0.5, color = "grey85"),
axis.ticks.y = element_line(size = 0.1),
panel.grid.major.y = element_line(size = 0.1)
)
# OVA subtype bar graphs
type_bar <- sobj_in@meta.data %>%
as_tibble(rownames = "cell_id") %>%
mutate(
subtype = fct_reorder(subtype, cell_id, n_distinct),
GMM_grp = fct_relevel(GMM_grp, c("ova low", "ova high", "Other"))
) %>%
ggplot(aes(GMM_grp, fill = subtype)) +
stat_count(position = "fill", geom = "point", size = 0, color = "white") +
geom_bar(position = "fill", size = 0.25, color = "black", show.legend = F) +
scale_fill_manual(values = ref_cols) +
scale_y_continuous(breaks = c(0, 0.5, 1)) +
guides(fill = guide_legend(ncol = 1, override.aes = list(shape = 22, size = 3.5, stroke = 0.25, color = "black"))) +
labs(y = "Fraction of cells") +
theme_minimal_hgrid() +
text_theme +
theme(
legend.title = element_blank(),
legend.key.height = unit(0.35, "cm"),
axis.title.x = element_blank(),
axis.line.y = element_line(size = 0.5, color = "grey85"),
axis.ticks.y = element_line(size = 0.1),
panel.grid.major.y = element_blank()
)
if (sep_bar_labs) {
type_bar <- type_bar +
theme(axis.text.x = element_text(hjust = c(0.9, 0.5, 0.1)))
}
# Reference UMAP
ref_umap <- sobj_in %>%
create_ref_umap(
feature = "subtype",
color_guide = legd_guide,
plot_cols = ref_cols,
pt_mtplyr = pt_size / 0.1,
pt_outline = pt_outline
) +
theme(
plot.margin = unit(c(0.2, 1.5, 0.2, 0.2), "cm"),
legend.position = "left",
legend.margin = margin(0.2, 0.2, 0.2, 1.5, "cm"),
legend.text = element_text(size = 8)
)
# Create list of feature UMAPs
feat_umaps <- sobj_in %>%
create_marker_umaps(
pt_mtplyr = pt_size / 0.1,
add_outline = pt_outline,
input_markers = feat_cols,
low_col = low_col
) %>%
map(~ {
.x +
guides(color = guide_colorbar(frame.colour = "black", frame.linewidth = 0.2)) +
theme(plot.title = element_text(size = 12))
})
# Top panel of feature UMAPs
top_umaps <- append(list(ref_umap), feat_umaps[1:2]) %>%
plot_grid(
plotlist = .,
rel_widths = c(1, 0.5, 0.5),
nrow = 1
)
# Bottom panel of feature UMAPs
bot_umaps <- feat_umaps[3:6] %>%
plot_grid(
plotlist = .,
nrow = 1,
align = "h",
axis = "tb"
)
# Final feature UMAP figure
feat_umaps <- plot_grid(
top_umaps, bot_umaps,
ncol = 1,
align = "vh",
axis = "trbl"
) +
theme(plot.margin = unit(c(1, 0.2, 1.5, 0.2), "cm"))
# Feature violin plots
box_data <- sobj_in %>%
FetchData(c(feats, "subtype", "GMM_grp")) %>%
as_tibble(rownames = "cell_id") %>%
pivot_longer(cols = c(-cell_id, -subtype, -GMM_grp)) %>%
mutate(type_name = str_c(subtype, "_", name)) %>%
group_by(type_name) %>%
mutate(up_qt = boxplot.stats(value)$stats[4]) %>%
ungroup() %>%
mutate(type_name = fct_reorder(type_name, up_qt, median, .desc = T)) %>%
# mutate(
# type_name = str_c(subtype, "_", name),
# type_name = fct_reorder(type_name, value, median, .desc = T)
# ) %>%
group_by(subtype, name) %>%
mutate(med = median(value)) %>%
ungroup() %>%
mutate(name = fct_reorder(name, med, max, .desc = T))
feat_boxes <- box_data %>%
ggplot(aes(type_name, value, fill = subtype)) +
facet_wrap(~ name, nrow = 1, scales = "free_x") +
scale_fill_manual(values = ref_cols) +
scale_color_manual(values = ref_cols) +
labs(y = "Counts") +
theme_minimal_hgrid() +
text_theme +
theme(
strip.text = element_text(size = 12),
legend.title = element_blank(),
legend.key.height = unit(0.35, "cm"),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.y = element_line(size = 0.5, color = "grey85"),
panel.grid.major.y = element_blank(),
panel.background = element_rect(fill = "#fafafa")
)
if (plot_boxes) {
feat_boxes <- feat_boxes +
stat_summary(geom = "point", shape = 22, fun = median, size = 0) +
stat_summary(geom = "point", shape = 22, fun = median, size = median_pt + 1, color = "black") +
geom_boxplot(
color = "white",
fill = "white",
alpha = 1,
size = 0.3,
width = 0.6,
outlier.colour = "white",
outlier.alpha = 1,
outlier.size = 0.1,
coef = 0
) +
geom_boxplot(
size = 0.3,
width = 0.6,
outlier.colour = "grey85",
outlier.alpha = 1,
outlier.size = 0.1,
show.legend = F,
coef = 0,
fatten = 0
) +
stat_summary(
aes(color = subtype),
geom = "point",
shape = 22,
fun = median,
size = median_pt,
stroke = 1,
fill = "white",
show.legend = F
) +
guides(fill = guide_legend(override.aes = list(size = 3.5, stroke = 0.25)))
} else {
feat_boxes <- feat_boxes +
geom_violin(size = 0.3, scale = "width") +
stat_summary(geom = "point", color = "black", alpha = 1, fun = median) +
theme(panel.spacing = unit(0.5, "cm"))
}
# Final top panel
top_lets <- letters
bot_lets <- c("", "d", "e")
if (!is.null(gmm_filt)) {
type_bar <- ggplot() +
theme_void()
top_lets <- c(letters[1:2], "")
bot_lets <- c("", "c", "d")
}
top <- plot_grid(
ova_grp_umap, ova_hist, type_bar,
rel_widths = c(0.86, 1, 0.9),
labels = top_lets,
label_fontface = "plain",
label_size = 18,
align = "h",
axis = "tb",
nrow = 1
)
# Final bottom panel
blank_gg <- ggplot() +
theme_void()
bot <- plot_grid(
feat_boxes, blank_gg,
rel_widths = c(1, if_else(plot_boxes, 0.45, 0.2))
)
# Create final figure
res <- plot_grid(
top, feat_umaps, feat_boxes,
rel_heights = c(0.55, 1.05, 0.35),
ncol = 1,
labels = bot_lets,
label_fontface = "plain",
label_size = 18
)
res
}
# Plot correlation
plot_corr <- function(sobj_in, x, y, feat, data_slot, cols_in, plot_title, ...) {
res <- sobj_in %>%
FetchData(c(x, y, feat), slot = data_slot) %>%
mutate(
!!sym(x) := log10(!!sym(x)),
!!sym(y) := log10(!!sym(y))
) %>%
filter(
!!sym(x) != -Inf,
!!sym(y) != -Inf
) %>%
plot_features(
x = x,
y = y,
feature = feat,
data_slot = "counts",
plot_cols = cols_in,
lab_pos = c(0.9, 1),
lab_size = 5,
calc_cor = T,
lm_line = T,
...
) +
ggtitle(plot_title) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
theme_info +
theme(legend.title = element_blank())
res
}
# Calculate pairwise p-values for gg objects
calc_p_vals <- function(gg_in, sample_name, data_column, type_column, log_tran = T) {
# Pull data from gg object
gg_data <- gg_in$data
# Log transform
if (log_tran) {
gg_data <- gg_data %>%
mutate(!!sym(data_column) := log10(!!sym(data_column)))
}
# Calculate median
gg_stats <- gg_data %>%
group_by(!!sym(type_column)) %>%
summarize(med = median(!!sym(data_column)))
# Run wilcox test
gg_counts <- gg_data %>%
pull(data_column)
gg_groups <- gg_data %>%
pull(type_column)
res <- gg_counts %>%
pairwise.wilcox.test(
g = gg_groups,
p.adj = "bonf"
) %>%
tidy()
# Add medians to data.frame
res <- gg_stats %>%
rename(med_1 = med) %>%
right_join(res, by = c("cell_type" = "group1"))
res <- gg_stats %>%
rename(med_2 = med) %>%
right_join(res, by = c("cell_type" = "group2"))
# Format final table
res <- res %>%
mutate(Sample = sample_name) %>%
select(
Sample,
`Cell type 1` = str_c(type_column, ".y"),
`Median OVA FC 1 (log10)` = med_1,
`Cell type 2` = type_column,
`Median OVA FC 2 (log10)` = med_2,
p.value
)
res
}
# Parameter lists
fig3_names <- c("d2_DC", "d14_DC", "d14_LEC", "d14_FRC")
fig3_sobjs <- sobjs[fig3_names]
fig3_params <- list(
sobj_in = fig3_sobjs,
plot_title = so_titles[names(fig3_sobjs)],
cols_in = so_cols[names(fig3_sobjs)]
)
corr_params <- list(
sobj_in = sobjs,
plot_title = so_titles[names(sobjs)],
cols_in = so_cols[names(sobjs)]
)
n_plot <- 0# Create panel plots
gg_list <- fig3_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create table of p-values
if (!is.null(params$p_val_xlsx)) {
p_vals <- gg_list[violin_idx] %>%
imap(
calc_p_vals,
data_column = "Relative ova signal",
type_column = "cell_type",
log_tran = T
) %>%
bind_rows()
p_vals %>%
write.xlsx(here(params$p_val_xlsx))
}
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Figures for all LEC timepoints
# Figure parameters
LEC_names <- c("d2_LEC", "d14_LEC")
LEC_sobjs <- sobjs[LEC_names]
LEC_params <- list(
sobj_in = LEC_sobjs,
plot_title = so_titles[names(LEC_sobjs)],
cols_in = so_cols[names(LEC_sobjs)],
pt_size = c(0.5, 0.1),
pt_outline = c(0.8, 0.4),
pt_size_2 = c(0.5, 0.3),
pt_outline_2 = c(0.8, 0.5)
)
# Create panel plots
gg_list <- LEC_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Figures for all FRC timepoints
# Figure parameters
FRC_names <- c("d2_FRC", "d14_FRC")
FRC_sobjs <- sobjs[FRC_names]
FRC_params <- list(
sobj_in = FRC_sobjs,
plot_title = so_titles[names(FRC_sobjs)],
cols_in = so_cols[names(FRC_sobjs)],
pt_size = c(1.5, 0.1),
pt_outline = c(2, 0.4),
pt_size_2 = c(1.5, 0.3),
pt_outline_2 = c(2, 0.5)
)
# Create panel plots
gg_list <- FRC_params %>%
pmap(
create_fig3,
box_counts = c("Relative ova signal" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)LEC markers associated with high antigen counts. (a) LECs containing low and high antigen counts were identified using a gaussian mixture model. A UMAP projection is shown for ova-low and ova-high cells. T cells, B cells, and epithelial cells are shown in white (Other). (b) The distribution of ova antigen counts is shown for ova-low and ova-high cells. Dotted lines indicate the mean counts for each population. (c) The fraction of cells belonging to each LEC subtype is shown for ova-low and ova-high populations. (d) Gene expression counts were plotted for select genes associated with ova-high LECs. (e) The expression of ova-high LEC markers is shown for each LEC subtype.
# Features to plot
feats <- list(
c("Prox1", "Cavin1", "Cavin2", "Stab1", "Stab2", "Csf1"), # Beth's list, Csf1 is not diff expr in latest analysis
c("Mmrn1", "Fn1", "Prox1", "Nudt4", "Cldn5", "Nfat5"), # These other genes are top ova high markers
c("Tgfa", "Prss23", "Thbs1", "Sgk1", "Sema3d", "Psap"),
c("Selenop", "Tnc", "Ctsd", "Nts", "Cavin2", "Fxyd6"),
c("Cyr61", "Klf4", "Clca3a1", "Timp3", "Stab1", "Egfl7")
)
# Create final figure
so_name <- "d14_LEC"
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
ova_cols = ova_cols_1,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]]
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
feats <- list(
c("Prox1", "Cavin1", "Cavin2", "Stab1", "Stab2", "Csf1"), # Beth's list, Csf1 is not diff expr in latest analysis
c("Ccl2", "Tnc", "Nts", "Fn1", "S1pr1", "Pdlim1"), # These other genes are top ova high markers
c("Thbs1", "Marcks", "Stab1", "Mmrn1", "Fkbp1a", "Cd63"),
c("Degs1", "Ctsl", "Grn", "Ccnd1", "Procr", "Emp1"),
c("Klf4", "Selenop", "Ctla2a", "Psap", "Fabp4", "Pdpn")
)
# Create final figure
so_name <- "d2_LEC"
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
ova_cols = ova_cols_1,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
sep_bar_labs = T,
low_col = "white"
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: Ccl2, Cxcl12, Msr1, Pkm, Gal3, Aif1, Mif
# CCR7hi migratory cDC2: cdc42ep3, EBI2, Malt1, Jak2
# cDC1: Ddx3, Skil, Nrp2
# CCR7hi migratory cDC1: Sema7a, PhiP, CD53, CD81, CD9, Ddx6
# Some genes not found: Gal3, EBI2, Ddx3
feats <- list(
c("Ccl2", "Mif", "Msr1", "Pkm", "Lgals3", "Aif1"),
c("Ccl2", "Cxcl12", "Msr1", "Pkm", "Lgals3", "Aif1")
)
# Create final figure
so_name <- "d2_DC"
gmm_filt <- c(subtype = "cDC2 Tbet-")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = c("white", "#fafafa"),
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: Ccl2, Cxcl12, Msr1, Pkm, Gal3, Aif1, Mif
# CCR7hi migratory cDC2: cdc42ep3, EBI2, Malt1, Jak2
# cDC1: Ddx3, Skil, Nrp2
# CCR7hi migratory cDC1: Sema7a, PhiP, CD53, CD81, CD9, Ddx6
feats <- list(
c("Cdc42ep3", "Malt1", "Jak2", "Gpr183", "Tpm1", "Aplp2"),
c("Cdc42ep3", "Malt1", "Jak2", "Tpm1", "Arl5a", "Aplp2")
)
# Create final figure
so_name <- "d2_DC"
gmm_filt <- c(subtype = "CCR7hi Mig cDC2")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = c("white", "#fafafa"),
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: Ccl2, Cxcl12, Msr1, Pkm, Gal3, Aif1, Mif
# CCR7hi migratory cDC2: cdc42ep3, EBI2, Malt1, Jak2
# cDC1: Ddx3, Skil, Nrp2
# CCR7hi migratory cDC1: Sema7a, PhiP, CD53, CD81, CD9, Ddx6
feats <- list(
c("Skil", "Ddx3x", "Nrp2", "Fscn1", "Tmem123", "Serpinb9")
)
# Create final figure
so_name <- "d2_DC"
gmm_filt <- c(subtype = "cDC1")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_3,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = c("white", "#fafafa"),
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: Ccl2, Cxcl12, Msr1, Pkm, Gal3, Aif1, Mif
# CCR7hi migratory cDC2: cdc42ep3, EBI2, Malt1, Jak2
# cDC1: Ddx3, Skil, Nrp2
# CCR7hi migratory cDC1: Sema7a, PhiP, CD53, CD81, CD9, Ddx6
feats <- list(
c("Sema7a", "Phip", "Cd81", "Cd9", "Ddx6", "Ndrg1")
)
# Create final figure
so_name <- "d2_DC"
gmm_filt <- c(subtype = "CCR7hi Mig cDC1")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = c("white", "#fafafa"),
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: CD53, ywhab
# cDC2 tbet+: siva1, rock2
# CCR7hi migratory cDC2: Insm1, CD200
# CCR7hi migratory cDC1: Ccl22, Dynl12, Eps15, Il21r
feats <- list(
c("Cd53", "Ywhab", "Cdk2ap2", "Sh3bgrl", "Serp1", "Pycard")
)
# Create final figure
so_name <- "d14_DC"
gmm_filt <- c(subtype = "cDC2 Tbet-")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = "white",
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: CD53, ywhab
# cDC2 tbet+: siva1, rock2
# CCR7hi migratory cDC2: Insm1, CD200
# CCR7hi migratory cDC1: Ccl22, Dynl12, Eps15, Il21r
feats <- list(
c("Siva1", "Rock2", "Dram1", "Dck", "Mat2a", "Larp4b")
)
# Create final figure
so_name <- "d14_DC"
gmm_filt <- c(subtype = "cDC2 Tbet+")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = "white",
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: CD53, ywhab
# cDC2 tbet+: siva1, rock2
# CCR7hi migratory cDC2: Insm1, CD200
# CCR7hi migratory cDC1: Ccl22, Dynl12, Eps15, Il21r
feats <- list(
c("Insm1", "Cd200", "Poglut1", "Adora2a", "Nupr1", "Tes")
)
# Create final figure
so_name <- "d14_DC"
gmm_filt <- c(subtype = "CCR7hi Mig cDC2")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = "white",
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Features to plot
# cDC2 tbet-: CD53, ywhab
# cDC2 tbet+: siva1, rock2
# CCR7hi migratory cDC2: Insm1, CD200
# CCR7hi migratory cDC1: Ccl22, Dynll2, Eps15, Il21r
feats <- list(
c("Ccl22", "Eps15", "Il21r", "Dynll2", "Ccser2", "Ccng2")
)
# Create final figure
so_name <- "d14_DC"
gmm_filt <- c(subtype = "CCR7hi Mig cDC1")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_4,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = "white",
sep_bar_labs = T
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})# Some genes not found: Dynl12
feats <- list(
c("Cfd", "Car3", "Scd1", "Retn", "Adig", "Chchd10"), # These are top ova high markers
c("Wfdc21", "Hp", "Apoc1", "Adipoq", "Pnpla2", "Plin4"),
c("Acsl1", "Cdo1", "Trf", "Cd36", "Glul", "Chpt1"),
c("Lgals3", "Scp2", "Ghr", "Cidec", "Lipe", "Bnip3")
)
# Create final figure
so_name <- "d14_FRC"
gmm_filt <- c(subtype = "Nr4a1+SC")
plot_args <- list(
str_c("v", seq_along(feats)), # Tab title
feats # Features to plot
)
pwalk(plot_args, ~ {
umap_cols <- feat_cols[seq_along(..2)]
umap_cols <- set_names(umap_cols, ..2)
cat("\n### ", ..1, "\n", sep = "")
create_fig4(
sobj_in = sobjs[[so_name]],
gmm_filt = gmm_filt,
ova_cols = ova_cols_2,
feats = names(umap_cols),
feat_cols = umap_cols,
ref_cols = so_cols[[so_name]],
low_col = "white"
) %>%
print()
cat("\n\n---\n\n<br>\n\n<br>\n\n")
})Clustering for LEC subtype assignment using Xiang et al. reference
# Load reference
load(here(params$sobj_refs$LEC))
so_in <- sobjs[["d14_LEC"]]
# Get subtypes for different resolutions/thresholds
reslns <- c(0.6, 1, 1.4, 1.8, 2.2)
r <- seq(0.5, 0.8, 0.1)
res_df <- expand.grid(reslns, r) %>%
rename(resln = Var1, r_thresh = Var2) %>%
as_tibble()
plot_df <- map2(res_df$resln, res_df$r_thresh, ~ {
res <- so_in %>%
FindClusters(
resolution = .x,
verbose = F
) %>%
clustify(
ref_mat = ref_LEC_xiang,
cluster_col = "seurat_clusters",
threshold = .y
)
res <- res@meta.data %>%
as_tibble(rownames = "cell_id") %>%
mutate(
subtype = if_else(cell_type == "LEC", type.clustify, subtype),
subtype = str_to_title_v2(subtype),
UMAP_1 = UMAP_1.x,
UMAP_2 = UMAP_2.x,
n_clust = str_c(n_distinct(seurat_clusters), " (", .x, ")"),
r = r.clustify,
r_thresh = .y
) %>%
select(
cell_id, orig.ident, seurat_clusters,
cell_type, subtype, n_clust,
r, r_thresh, UMAP_1, UMAP_2
)
res
}) %>%
bind_rows()
# Subtype UMAPs
subtype_gg <- plot_df %>%
plot_features(
feature = "subtype",
pt_outline = 0.5,
pt_size = 0.05,
plot_cols = so_cols[["d14_LEC"]],
split_id = c("n_clust", "r_thresh"),
switch = "y"
) +
blank_theme +
labs(subtitle = "Clustifyr Threshold", y = "Number of Clusters") +
guides(color = outline_guide) +
blank_theme +
theme(
plot.subtitle = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Reference UMAPs
clust_guide <- outline_guide
clust_guide$ncol <- 1
ref_gg <- plot_df %>%
mutate(r_thresh = "Clusters") %>%
unique() %>%
plot_features(
feature = "seurat_clusters",
pt_outline = 0.5,
pt_size = 0.05,
split_id = c("n_clust", "r_thresh"),
switch = "y"
) +
blank_theme +
labs(y = "Number of Clusters") +
guides(fill = clust_guide) +
blank_theme +
theme(
plot.subtitle = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Correlation UMAPs
cor_df <- plot_df %>%
mutate(r_thresh = "Correlation") %>%
unique()
cor_gg <- cor_df %>%
ggplot(aes(UMAP_1, UMAP_2, color = r))
walk(unique(cor_df$seurat_clusters), ~ {
pt_1 <- cor_df %>%
filter(
seurat_clusters == .x,
r <= 0.6
)
pt_2 <- cor_df %>%
filter(
seurat_clusters == .x,
r > 0.6
)
cor_gg <<- cor_gg +
geom_point(data = pt_1, color = "black", size = 0.5) +
geom_point(data = pt_1, color = "white", size = 0.05) +
geom_point(data = pt_2, color = "black", size = 0.5) +
geom_point(data = pt_2, size = 0.05)
})
cor_gg <- cor_gg +
facet_grid(n_clust ~ r_thresh, switch = "y") +
labs(y = "Number of Clusters") +
blank_theme +
theme(
panel.border = element_rect(fill = NA, color = "black", size = 0.2),
legend.title = element_blank(),
axis.title.y = element_text()
)
# Create final figure
plot_grid(
ref_gg, cor_gg, subtype_gg,
rel_widths = c(0.4, 0.4, 1),
nrow = 1,
align = "vh",
axis = "tbrl"
)Correlation between RNA counts and ova counts grouped by cell type
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "subtype",
data_slot = "counts",
pt_size = 0.5
) %>%
plot_grid(
plotlist = .,
ncol = 2,
align = "vh",
axis = "trbl"
)Correlation between RNA counts and ova counts grouped by cell type and subtype
pad <- c(2, 2, 10.5, 2, 10.5, 2)
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "subtype",
split_id = "subtype",
data_slot = "counts",
pt_size = 1,
scales = "free",
ncol = 3
) %>%
map2(pad, ~ {
.x + theme(
legend.position = "none",
strip.text = element_text(size = 14),
plot.margin = unit(c(0.2, 0.2, .y, 0.2), "cm")
)
}) %>%
plot_grid(
plotlist = .,
rel_heights = c(1, 1, 1),
align = "v",
ncol = 2
)